library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
                      message = FALSE, cache.lazy = FALSE, cache.path = "../../gitignore/RmdCache/", # keep heavy data in gitignore cache
                      fig.path = "Rmdfig/")
machine="mythinkpad" # define the machine we work on
loadALL = TRUE # load all uniteCov objects
loadannot = TRUE # load genome annotations
sourceDMS = TRUE # Load the calculated DMS
sourceSubUnite = FALSE
source("R02.3_DATALOAD.R")

## So far, we don't use DMR (because RRBS single end, this is not super meaningful)
rm(DMRBPlist, DMRlist)

Data getting loaded:

1 Global methylation and fitness

Compare fitness traits between the different offsprings groups. Follow up of Sagonas 2020 & Ferre Ortega’s master’s dissertation

1.1 Calculate BCI

Kaufmann et al. 2014: Body condition of the G2 fish, an estimate of fish health and a predictor of energy reserves and reproductive success, was calculated using there residuals from the regression of body mass on body length (Chellappaet al.1995).

fullMetadata_OFFS$BCI <- residuals(lmer(Wnettofin ~ Slfin * Sex + (1|brotherPairID), data=fullMetadata_OFFS))

## and for parents (no sex difference, only males):
fullMetadata_PAR$BCI <- residuals(lmer(Wnettofin ~ Slfin + (1|brotherPairID), data=fullMetadata_PAR))

Effect of paternal treatment on body condition of offspring: Kaufmann et al. 2014: “To investigate in which way paternal G1 exposure affected offspring tolerance, we tested how the relationship between G2 body condition and infection intensity was affected by paternal G1 exposure. This was tested in a linear mixed model on G2 body condition with paternal G1 treatment and the interaction between paternal G1 treatment and G2 infection intensity as fixed effects. Maternal half-sibship identity was set as a random effect”

1.2 Effect of paternal exposure on tolerance

1.2.1 BCI per trt

Effect of treatment groups of offspring on body condition(Kaufmann et al. 2014): “The linear mixed effect model (nlme function in R) included G2 body condition as dependent variable, sex, G2 treatment (exposed vs. control), paternal G1 treatment (exposed vs. control) and their interactions as fixed effects as well as maternal G2 half-sibship identity as a random effect”

mod1 <- lme(BCI ~ offsTrt * patTrt, random=~1|brotherPairID,data=fullMetadata_OFFS)
anova(mod1) # strong significant effect of both offspring trt & paternal + interactions
##                numDF denDF   F-value p-value
## (Intercept)        1   100  0.000000  1.0000
## offsTrt            1   100 14.057746  0.0003
## patTrt             1   100 13.600342  0.0004
## offsTrt:patTrt     1   100  9.396573  0.0028
mod1.2 <- lme(BCI ~  trtG1G2, random=~1|brotherPairID,data=fullMetadata_OFFS)
## pairwise posthoc test
emmeans(mod1.2, list(pairwise ~ trtG1G2), adjust = "tukey")
## $`emmeans of trtG1G2`
##  trtG1G2    emmean   SE df lower.CL upper.CL
##  NE_control   16.6 10.8  7    -8.87     42.0
##  NE_exposed  -57.7 11.0  7   -83.63    -31.8
##  E_control    23.6 10.8  7    -1.85     49.0
##  E_exposed    15.5 10.8  7    -9.90     41.0
## 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## 
## $`pairwise differences of trtG1G2`
##  1                       estimate   SE  df t.ratio p.value
##  NE_control - NE_exposed    74.29 15.3 100   4.840  <.0001
##  NE_control - E_control     -7.03 15.2 100  -0.462  0.9671
##  NE_control - E_exposed      1.03 15.2 100   0.067  0.9999
##  NE_exposed - E_control    -81.32 15.3 100  -5.298  <.0001
##  NE_exposed - E_exposed    -73.27 15.3 100  -4.773  <.0001
##  E_control - E_exposed       8.05 15.2 100   0.529  0.9517
## 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 4 estimates
## Control father - treatment offspring has a strongly significantly lower BC than
## every other group, same as Kaufmann et al. 2014

myplot1 <- ggplot(fullMetadata_OFFS, aes(x=trtG1G2, y = BCI, fill=trtG1G2))+
  geom_boxplot()+
  geom_signif(comparisons = list(c("NE_control", "NE_exposed")),
              map_signif_level=TRUE, annotations="***",
              y_position = 150, tip_length = 0, vjust=0.4) +
  geom_signif(comparisons = list(c("NE_exposed", "E_control")),
              map_signif_level=TRUE, annotations="***",
              y_position = 200, tip_length = 0, vjust=0.4) +
  geom_signif(comparisons = list(c("NE_exposed", "E_exposed")),
              map_signif_level=TRUE, annotations="***",
              y_position = 250, tip_length = 0, vjust=0.4) +
  scale_fill_manual(values = colOffs)+
  theme_bw() + theme(legend.position = "none") +
  ylab("Body Condition Index") +
  scale_x_discrete(labels=c("NE_control" = "G1 control\nG2 control", "NE_exposed" = "G1 control\nG2 infected",
                            "E_control" = "G1 infected\nG2 control", "E_exposed" = "G1 infected\nG2 infected"),
                   name = NULL)
myplot1

1.2.2 Tolerance (slope of BCI on worms count) per paternal status

mod_Tol <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol, reduce.random = F) # Model found: full model
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                            7 -608.33 1230.7                  
## (1 | brotherPairID)          0    6 -608.33 1228.7   0  1          1
## (1 | Sex)                    0    6 -608.33 1228.7   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##              Eliminated Sum Sq Mean Sq NumDF DenDF F value  Pr(>F)  
## No.Worms:PAT          0  20660   20660     1   111  6.1283 0.01481 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ No.Worms * PAT + (1 | brotherPairID) + (1 | Sex)
## The slope of BCI on nbrworms varies upon treatment
plot(ggpredict(mod_Tol, terms = c("No.Worms", "PAT")), add.data=T)+ theme_bw() +
  ylab("Body Condition Index") + xlab("Number of worms") +
  ggtitle("Predicted values of Body Condition Index in offspring")+
  scale_color_manual(NULL, values = c("black", "red")) +
  scale_fill_manual(NULL, values = c("black", "red"))  +
  scale_x_continuous(breaks = 0:10)+
  geom_point(size=0)+ # to have color key in legend as point
  guides(colour = guide_legend(override.aes = list(size=3,linetype=0, fill = NA)))

1.4 Nbr/Ratio of Methylated Sites in different groups

We kept in total 135 samples. On average, 11.27 [+/- 0.33] million reads were sequenced. The average mapping efficiency was 85% [+/- 0.48%].

The mean coverage per CpG site in the full dataset, considering positions covered in all fish, is 83 [+/- 2.21].

For G1 only: We kept in total 24 samples. On average, 11.16 [+/- 0.67] million reads were sequenced. The average mapping efficiency was # 84% [+/- 1.39%].

The mean coverage per CpG site in G1, considering positions shared by at least 6 fish per treatment group (half individuals per group), and which overlap with positions retained in G2, is 46 [+/- 1.71].

For G2 only: We kept in total 111 samples. On average, 11.29 [+/- 0.38] million reads were sequenced. The average mapping efficiency was 86% [+/- 0.47%].

The mean coverage per CpG site in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, is 47 [+/- 0.76].

1.5 Choice of global methylation value

cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$Nbr_methCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$Nbr_methCpG
## S = 350, p-value = 2.153e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8478261
## S = 350, p-value = 2.15e-06, rho = 0.85
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

## Check after RMS correction for coverage bias: CORRECTED (p-value = 0.4485)
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$RMS_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$RMS_coveredCpG
## S = 2712, p-value = 0.4006
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1791304
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

## and with residuals: COMPLETELY CORRECTED p-value = 0.9562
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG
## S = 2308, p-value = 0.9886
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##          rho 
## -0.003478261
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

############
## Offspring:
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$Nbr_methCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$Nbr_methCpG
## S = 20254, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9111355
## S = 20254, p-value < 2.2e-16 rho = 0.91
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Number of methylated cytosines") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## Plot distance to residuals:
fit <- lm(Nbr_methCpG ~ Nbr_coveredCpG, data = fullMetadata_OFFS)
plotdf <- fullMetadata_OFFS
plotdf$predicted <- predict(fit)   # Save the predicted values
plotdf$residuals <- residuals(fit)
ggplot(plotdf, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_segment(aes(xend = Nbr_coveredCpG, yend = predicted), col = "grey") +
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Number of methylated cytosines") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## Check after RMS correction for coverage bias: SEMI CORRECTED (p-value = 0.01, rho = -0.24)
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$RMS_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$RMS_coveredCpG
## S = 282466, p-value = 0.01158
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2393208
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  geom_smooth(method = "lm", col="black")+
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## and with residuals: COMPLETELY CORRECTED p-value = 0.51
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG
## S = 213674, p-value = 0.514
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.06250439
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  geom_smooth(method = "lm", col="black")+
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Residuals of number of methylated cytosines\n on number of cytosines covered") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

1.6 Why we should we correct for sex

1.6.1 No difference in mappability (p>0.05)

mod = lm(MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=203.17
## MappingEfficiency.BSBoldvsGynogen ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## <none>              667.74 203.18
## - Sex   1    22.563 690.30 204.86
## 
## Call:
## lm(formula = MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9847 -1.5030  0.3133  2.0418  4.0823 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  86.3134     0.3337 258.624   <2e-16 ***
## SexM         -0.9017     0.4699  -1.919   0.0576 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.475 on 109 degrees of freedom
## Multiple R-squared:  0.03269,    Adjusted R-squared:  0.02381 
## F-statistic: 3.683 on 1 and 109 DF,  p-value: 0.05758
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is WITH unknown and sex chromosomes, before filtering.

1.6.2 No difference in number of reads (p>0.05)

mod = lm(M.Seqs_rawReads ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=158.08
## M.Seqs_rawReads ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## - Sex   1    3.8569 448.66 157.04
## <none>              444.80 158.08
## 
## Step:  AIC=157.04
## M.Seqs_rawReads ~ 1
## 
## Call:
## lm(formula = M.Seqs_rawReads ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4901 -1.1901 -0.2901  0.8599  8.8099 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.2901     0.1917    58.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.02 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is WITH unknown and sex chromosomes, before filtering.

1.6.3 No difference in mean coverage per CpG in the filtered dataset (p>0.05)

mod = lm(MeanCoverage ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=314.87
## MeanCoverage ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## - Sex   1    4.4425 1831.0 313.14
## <none>              1826.5 314.87
## 
## Step:  AIC=313.14
## MeanCoverage ~ 1
## 
## Call:
## lm(formula = MeanCoverage ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8662 -2.4123 -0.6565  1.7138 15.0594 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  46.8858     0.3872   121.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.08 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.4 No difference in number of sites covered in the filtered dataset (p>0.05)

mod = lm(Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=2579.96
## Nbr_coveredCpG ~ Sex
## 
##        Df  Sum of Sq        RSS    AIC
## - Sex   1 1.9317e+10 1.3495e+12 2579.6
## <none>               1.3302e+12 2580.0
## 
## Step:  AIC=2579.56
## Nbr_coveredCpG ~ 1
## 
## Call:
## lm(formula = Nbr_coveredCpG ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -338266  -58536   26350   82661  139983 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   853897      10513   81.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 110800 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.5 No difference in number of sites covered in the filtered dataset (p>0.05)

mod = lm(OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=154.63
## OverallPercentageMethylation ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## <none>              431.18 154.63
## - Sex   1    12.428 443.61 155.78
## 
## Call:
## lm(formula = OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9467 -1.2674 -0.4101  0.6060 10.3697 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  56.1035     0.2682 209.196   <2e-16 ***
## SexM         -0.6692     0.3776  -1.772   0.0791 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.989 on 109 degrees of freedom
## Multiple R-squared:  0.02801,    Adjusted R-squared:  0.0191 
## F-statistic: 3.142 on 1 and 109 DF,  p-value: 0.07911
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.6 Males have a lower global methylation than females (residuals of nbr of methylated sites by nbr of sites covered)

mod = lm(res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod)) # sex is significant p = 0.000157 ***
## Start:  AIC=2165.93
## res_Nbr_methCpG_Nbr_coveredCpG ~ Sex
## 
##        Df  Sum of Sq        RSS    AIC
## <none>               3.1917e+10 2165.9
## - Sex   1 4493411296 3.6411e+10 2178.6
## 
## Call:
## lm(formula = res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -38952 -10289   -519  10434  45882 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6420       2307   2.782 0.006361 ** 
## SexM          -12726       3248  -3.917 0.000157 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17110 on 109 degrees of freedom
## Multiple R-squared:  0.1234, Adjusted R-squared:  0.1154 
## F-statistic: 15.35 on 1 and 109 DF,  p-value: 0.0001565
anova(mod)
## Analysis of Variance Table
## 
## Response: res_Nbr_methCpG_Nbr_coveredCpG
##            Df     Sum Sq    Mean Sq F value    Pr(>F)    
## Sex         1 4.4934e+09 4493411296  15.345 0.0001565 ***
## Residuals 109 3.1917e+10  292820190                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(ggpredict(mod, terms = c("Sex")), add.data = T) +
  xlab(NULL)+
  ylab("Residuals of N methylated sites on N covered sites") +
  ggtitle("Predicted values of global methylation in offspring")

1.7 Are mean residuals meth sites different following tolerance slope?

1.7.1 In all offspring

fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG_div1000 <- (fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG)/1000

mod_Tol.Meth <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
                     data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol.Meth, reduce.random = F) # Model found: BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                           11 -606.89 1235.8                  
## (1 | brotherPairID)          0   10 -606.89 1233.8   0  1          1
## (1 | Sex)                    0   10 -606.89 1233.8   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                     Eliminated  Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT          1  2780.8  2780.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT                   2  2396.3  2396.3
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms              3  1829.8  1829.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                       4  2571.6  2571.6
## No.Worms:PAT                                                 0 20659.8 20659.8
##                                                     NumDF DenDF F value  Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT     1   111  0.8465 0.35953
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT              1   111  0.7240 0.39667
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms         1   111  0.5492 0.46020
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                  1   111  0.7681 0.38270
## No.Worms:PAT                                            1   111  6.1283 0.01481
##                                                      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT  
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT           
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000               
## No.Worms:PAT                                        *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## The slope of BCI on nbrworms varies upon treatment but methylation does NOT vary with tolerance
mod_Tol.Meth <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
                     data=fullMetadata_OFFS)

## And by treatment instead of No.worms?
mod_Tol.Meth2 <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT*outcome + (1|brotherPairID)+ (1|Sex),
                      data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol.Meth2, reduce.random = F) # Model found: BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                           11 -603.06 1228.1                  
## (1 | brotherPairID)          0   10 -603.06 1226.1   0  1          1
## (1 | Sex)                    0   10 -603.06 1226.1   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                    Eliminated  Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome          1  2156.6  2156.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome              2   494.6   494.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT                  3  1034.5  1034.5
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                      4  2542.3  2542.3
## PAT:outcome                                                 0 30432.9 30432.9
##                                                    NumDF DenDF F value   Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome     1   111  0.7034 0.403442
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome         1   111  0.1603 0.689644
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT             1   111  0.3348 0.564008
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                 1   111  0.8203 0.367046
## PAT:outcome                                            1   111  9.7478 0.002289
##                                                      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome   
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome       
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT           
## res_Nbr_methCpG_Nbr_coveredCpG_div1000               
## PAT:outcome                                        **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome

The slope of BCI on nbr worms varies upon parental treatment, but methylation does NOT vary with tolerance

1.7.2 In exposed offspring only

## By group, tolerance slope as a function of methylation residuals:
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms + (1|brotherPairID) + (1|Sex),
                data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC      LRT Df Pr(>Chisq)
## <none>                            7 -293.63 601.26                       
## (1 | brotherPairID)          0    6 -293.63 599.26 0.000000  1     1.0000
## (1 | Sex)                    0    6 -293.66 599.32 0.067386  1     0.7952
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                 Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms          1  362.1   362.1     1
## No.Worms                                                 2  577.4   577.4     1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                   3 8726.5  8726.5     1
##                                                  DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 49.047  0.1098 0.7418
## No.Worms                                        51.973  0.1776 0.6752
## res_Nbr_methCpG_Nbr_coveredCpG_div1000          37.954  2.7327 0.1066
## 
## Model found:
## BCI ~ (1 | brotherPairID) + (1 | Sex)
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT + (1|brotherPairID) + (1|Sex),
                data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC    LRT Df Pr(>Chisq)
## <none>                            7 -278.14 570.27                     
## (1 | brotherPairID)          0    6 -278.14 568.27 0.0000  1     1.0000
## (1 | Sex)                    0    6 -278.93 569.86 1.5904  1     0.2073
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                            Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT          1    504     504     1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000              2   1476    1476     1
## PAT                                                 0  76908   76908     1
##                                             DenDF F value    Pr(>F)    
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 50.451  0.2624    0.6107    
## res_Nbr_methCpG_Nbr_coveredCpG_div1000     51.749  0.7782    0.3818    
## PAT                                        47.592 41.0412 6.195e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)

1.8 PCA based on all methylation values in G2

# 1. get raw values
percmeth = percMethylation(uniteCov14_G2_woSexAndUnknowChrOVERLAP)

# Run PCA on complete data (CpG covered in all fish)
PCA_allpos <- myPCA(x = t(na.omit(percmeth)), incomplete = FALSE)
## [1] "The chosen model is:"
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55d44397b050>

We perform a PCA on 78384 CpG sites covered in all G2 individuals. We first perform a test on the complete dataset.

1.8.1 PCA plot with associated colors for treatments

fviz_pca_ind(PCA_allpos$res.PCA,  label="none", habillage=PCA_allpos$metadata$trtG1G2) +
  scale_color_manual(values = colOffs)+
  scale_shape_manual(values=c(19,19,19,19))

1.8.2 PCA plot with associated colors for brother pair

fviz_pca_ind(PCA_allpos$res.PCA,  label="none", habillage=as.factor(PCA_allpos$metadata$brotherPairID))

# The function dimdesc() can be used to identify the most correlated variables with a given principal component.
mydimdesc <- dimdesc(PCA_allpos$res.PCA, axes = c(1,2), proba = 0.05)

There are 36212 CpG sites most correlated (p < 0.05) with the first principal component , and 15101 with the second principal component.

The 2 first PCA axes do not explain BCI (p<0.05)

1.8.3 How much of the BCI variance is explained by each variables?

# Percentage of variance explained by each factor:
formula(PCA_allpos$modSel) # BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55d44397b050>
mod_noworms = lmer(BCI ~ PAT + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
mod_noPAT = lmer(BCI ~ No.Worms + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)

# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noworms)[2])*100

B = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
  • 10.75% of the variance in associated with the parasite load (number of worms)
  • 13.38% of the variance in associated with the paternal treatment

1.9 Pretty summary picture

# Set up scatterplot
scatterplot <- ggplot(fullMetadata_OFFS,
                      aes(x = res_Nbr_methCpG_Nbr_coveredCpG,
                          y = BCI, fill=trtG1G2)) +
  geom_point(pch=21, size =3, alpha = .8) +
  guides(color = "none") +
  scale_fill_manual(values = colOffs, name = "Treatment",
                    labels = c("G1 control - G2 control", "G1 control - G2 exposed", "G1 exposed - G2 control", "G1 exposed - G2 exposed")) +
  theme(plot.margin = margin()) + theme_bw() +
  theme(legend.position = "none") +
  xlab("Methylation residuals (methylated sites/coverage")+
  ylab("Body Condition Index")

# Define marginal histogram
marginal_distribution <- function(x, var, group) {
  ggplot(x, aes_string(x = var, fill = group)) +
    # geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
    geom_density(alpha = 0.6, size = 0.2) +
    guides(fill = "none") +
    scale_fill_manual(values = colOffs) +
    theme_void() +
    theme(plot.margin = margin())
}

# Set up marginal histograms
x_hist <- marginal_distribution(fullMetadata_OFFS, "res_Nbr_methCpG_Nbr_coveredCpG", "trtG1G2")
y_hist <- marginal_distribution(fullMetadata_OFFS, "BCI", "trtG1G2") +
  coord_flip()

# Align histograms with scatterplot
aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]

# Arrange plots
cowplot::plot_grid(
  aligned_x_hist, NULL, scatterplot, aligned_y_hist, ncol = 2, nrow = 2, rel_heights = c(0.2, 1), rel_widths = c(1, 0.2)
)

2 Methylation profile

Methylation profiles, CpG present in all fish

2.1 Dendogram of methylations

Methylation profile for the 55530 CpG sites covered in all G1 & G2 fish (N = 135:

makePrettyMethCluster(uniteCovALL_woSexAndUnknowChr, fullMetadata,
                      my.cols.trt=c("#333333ff","#ff0000ff","#ffe680ff","#ff6600ff","#aaccffff","#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)

2.1.1 Offspring:

Methylation profile for the 78384 CpG sites covered in all G2 fish (N = 111:

makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
                      my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)

# Save
pdf(file = "../../dataOut/clusterALLCpG_offspings.pdf", width = 10, height = 4)
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
                      my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)
dev.off()
## png 
##   2

2.2 Adonis tests: impact of different variables on methylation pattern

Permutational multivariate analysis of variance (PERMANOVA) is a non-parametric multivariate statistical test. It is used to compare groups of objects and test the null hypothesis that the centroids and dispersion of the groups as defined by measure space are equivalent for all groups.

# make distance matrix with B-C distances
data.dist = makeDatadistFUN(uniteCovALL_G2_woSexAndUnknowChr)

## Adonis test: importance of each predictor
adonis2(data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
##                                Df SumOfSqs      R2      F Pr(>F)    
## PAT                             1 0.002782 0.01470 1.8292  0.001 ***
## outcome                         1 0.001909 0.01009 1.2552  0.030 *  
## Sex                             1 0.002418 0.01277 1.5895  0.003 ** 
## brotherPairID                   7 0.028018 0.14805 2.6316  0.001 ***
## PAT:outcome                     1 0.001680 0.00888 1.1045  0.127    
## PAT:Sex                         1 0.001492 0.00789 0.9813  0.525    
## outcome:Sex                     1 0.001557 0.00823 1.0240  0.312    
## PAT:brotherPairID               7 0.014344 0.07580 1.3473  0.001 ***
## outcome:brotherPairID           7 0.010591 0.05596 0.9947  0.506    
## Sex:brotherPairID               7 0.010394 0.05492 0.9763  0.652    
## PAT:outcome:Sex                 1 0.001453 0.00768 0.9555  0.603    
## PAT:outcome:brotherPairID       7 0.010351 0.05470 0.9722  0.719    
## PAT:Sex:brotherPairID           7 0.010249 0.05415 0.9626  0.731    
## outcome:Sex:brotherPairID       6 0.008749 0.04623 0.9587  0.742    
## PAT:outcome:Sex:brotherPairID   1 0.001127 0.00596 0.7413  1.000    
## Residual                       54 0.082132 0.43399                  
## Total                         110 0.189247 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results: family of the father (brotherPairID) explains more than 14% of the variance in methylation.

To focus on G1 and G2 treatments, we define the permutation structure considering brother pairs (N = 8), and use a PERMANOVA to test the hypothesis that paternal treatment, offspring treatment and their interactions significantly influencing global methylation.

perm <- how(nperm = 1000) # 1000 permutations
setBlocks(perm) <- with(fullMetadata_OFFS, brotherPairID) # define the permutation structure considering brotherPairID and sex
print(adonis2(data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS, brotherPairID) 
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm)
##                  Df SumOfSqs      R2      F   Pr(>F)    
## PAT               1 0.002782 0.01470 1.6344 0.000999 ***
## outcome           1 0.001909 0.01009 1.1216 0.038961 *  
## Sex               1 0.002418 0.01277 1.4203 0.003996 ** 
## PAT:outcome       1 0.001716 0.00907 1.0080 0.099900 .  
## PAT:Sex           1 0.001740 0.00920 1.0224 0.192807    
## outcome:Sex       1 0.001703 0.00900 1.0004 0.341658    
## PAT:outcome:Sex   1 0.001653 0.00874 0.9712 0.329670    
## Residual        103 0.175326 0.92644                    
## Total           110 0.189247 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • 1.5% of the variation explained by PAT (R2=0.01470, p < 0.001)
  • 1% of the variation explained by outcome (R2=0.01009, p = 0.03)
  • 1.3% of the variation explained by Sex (R2=0.01277, p < 0.01)

2.3 NMDS

#### RUN Goodness of fit
myGOF.NMDS.FUN(dataset = uniteCovALL_G2_woSexAndUnknowChr)

Goodness of fit for NMDS suggested the presence of six dimensions with a stress value > 0.1 and 2 with > 0.2

## to find the seed that allows convergence:
# sapply(3:10, function(x) myNMDS(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = x))
NMDSanalysis <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = 4)

NMDSanalysis$NMDSplot

# Save
pdf(file = "../../dataOut/NMDSplot_allG2.pdf", width = 9, height = 11)
NMDSanalysis$NMDSplot
dev.off()
## png 
##   2

2.4 The methylation pattern is more affected by direct treatment when the father was infected (Adonis test WITHIN both parental trt)

2.4.1 1. Parents NOT infected

AdonisWithinG1trtFUN(trtgp = c(2,3))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,  
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
##               Df SumOfSqs      R2      F   Pr(>F)    
## outcome        1 0.001475 0.01622 1.0073 0.486513    
## Sex            1 0.002094 0.02302 1.4301 0.000999 ***
## brotherPairID  7 0.020026 0.22018 1.9537 0.001998 ** 
## Residual      46 0.067357 0.74058                    
## Total         55 0.090952 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • brother pair explains 22% of the observed variance p < 0.01
  • sex explains 2.3% of the observed variance p < 0.001
  • Offspring trt explains 1.6% of the observed variance (non significant)
NMDSanalysis_G1control <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
                                    metadata = fullMetadata_OFFS, myseed = 25,
                                    byParentTrt=TRUE,
                                    trtgp = c(2,3))

#png(filename = "../../dataOut/NMDSplot_G1fromControlG2.png", width = 900, height = 900)
NMDSanalysis_G1control$NMDSplot

#dev.off()

2.4.2 2. Parents infected

AdonisWithinG1trtFUN(trtgp = c(5,6))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,  
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
##               Df SumOfSqs      R2      F   Pr(>F)   
## outcome        1 0.002160 0.02262 1.4047 0.006993 **
## Sex            1 0.002053 0.02150 1.3349 0.192807   
## brotherPairID  7 0.022076 0.23117 2.0507 0.019980 * 
## Residual      45 0.069205 0.72470                   
## Total         54 0.095494 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • brother pair explains 22% of the observed variance p = 0.01
  • sex explains 2.1% of the observed variance (non significant)
  • Offspring trt explains 2.3% of the observed variance p < 0.01
NMDSanalysis_G1infected <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
                                     metadata = fullMetadata_OFFS, myseed = 25,
                                     byParentTrt=TRUE,
                                     trtgp = c(5,6))

#png(filename = "../../dataOut/NMDSplot_G1fromInfectedG2.png", width = 900, height = 900)
NMDSanalysis_G1infected$NMDSplot

#dev.off()

3 Differential methylations “Bottom up”: DM in offspring

3.1 Description of DMS in the four different comparisons

Differential methylation by brother pair (sex as covariate):

  1. CC-TC = CONTROL fish (parent CvsT)
  2. CT-TT = TREATMENT fish (parent CvsT)
  3. CC-CT = fish from CONTROL parents (G2 CvsT)
  4. TC-TT = fish from TREATMENT parents (G2 CvsT)

3.1.1 Attribute plot of the DMS in every BP for the 4 comparisons

## All DMS are stored in DMSBPlist by brother pair

## Vector of all 4 comparisons
vecCompa <- c("CC_TC", "CT_TT", "CC_CT", "TC_TT")
vecCompaVerbose <- c("Control offspring in control vs infected parents", "Infected offspring in control vs infected parents", "Control vs infected offspring from control parent", "Control vs infected offspring from infected parent") # this is useful in plots

## Extract DMS for all 4 comparisons (by position)
myPosList = lapply(DMSBPlist, lapply, function(x){paste(x$chr, x$end)})

## Prepare df for complexUpset
getUpsetDF = function(i, NBP){ # for a given comparison
  A = get2keep(vecCompa[i], NBP)
  A2 = lapply(A, function(x){
    x = data.frame(x)    # vector of DMS as df
    names(x) = "DMS"    # name each CpG
    return(x)
  })
  ## Add BP name
  for (i in 1:length(names(A2))){
    A2[[i]]["BP"] = names(A2)[i]
  }
  # make a dataframe
  A2 = A2 %>% reduce(full_join, by = "DMS")
  # names column with BP id
  for (i in 2:ncol(A2)) {names(A2)[i] = unique(A2[!is.na(A2[i]), i])}
  # replace by 0 or 1 the DMS absence/presence
  A = data.frame(apply(A2[2:9], 2, function(x) ifelse(is.na(x), 0, 1)))
  # add DMS
  A$DMS = A2$DMS
  return(A)
}

Make upset plots for DMS found in at least 6 BP:

for (i in 1:4){
  df = getUpsetDF(i, NBP = 6)
  print(ComplexUpset::upset(
    df,
    names(df)[1:8],
    width_ratio=0.1,
    themes=upset_default_themes(text=element_text(size=15)),
    sort_intersections_by=c('degree', 'cardinality'),
    queries=query_by_degree(
      df,  names(df)[1:8],
      params_by_degree=list(
        '1'=list(color='red', fill='red'),
        '2'=list(color='purple', fill='purple'),
        '3'=list(color='blue', fill='blue'),
        '4'=list(color='grey', fill='grey'),
        '5'=list(color='red', fill='red'),
        '6'=list(color='purple', fill='purple'),
        '7'=list(color='blue', fill='blue'),
        '8'=list(color='green', fill='green')
      ),
      only_components=c("intersections_matrix", "Intersection size")
    )) + ggtitle(paste0("Differentially methylated sites found in more than six brother pairs in the comparison: \n", vecCompaVerbose[i]))) #+ theme(plot.title = element_text(size = 30)))
}

3.2 Statistical setup

PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons INTERACTION effects: DMS found in CC-CT which show a differential methylation (not necessarily significant) in the opposite direction in TC-TT, or inversely

3.2.1 Identify the different DMS groups

## Subselect those DMS present in at least 4 out of 8 BP
get2keep = function(Compa, NBP = 4){
  x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
  f <- table(unlist((x))) # each DMS present between 1 and 8 times
  tokeep <- names(f)[f >= NBP]
  # print(length(tokeep))
  ## Keep the DMS present in 4 families minimum
  DMSBPlist_INTER4 <- lapply(x, function(x){x[x %in% tokeep]})
  ## Reorder by family:
  DMSBPlist_INTER4 <- DMSBPlist_INTER4[names(DMSBPlist_INTER4)[order(names(DMSBPlist_INTER4))]]
  return(DMSBPlist_INTER4)
}

# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
DMS_PaternalEffect_4BPmin = unique(c(unlist(get2keep(vecCompa[1], NBP = 4)), unlist(get2keep(vecCompa[2], NBP = 4))))

# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
DMS_OffspringEffect_4BPmin = unique(c(unlist(get2keep(vecCompa[3], NBP = 4)), unlist(get2keep(vecCompa[4], NBP = 4))))

# INTERACTION effects: DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed)
setDMSInversionSlope = getInteractionDMS() # in customfunctions.R

# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InversionSlope" = setDMSInversionSlope),
              label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none")

# Save:
pdf(file = "../../dataOut/DMS3groupsVenn.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InversionSlope" = setDMSInversionSlope),
              label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none")
dev.off()
## png 
##   2
# The five cases of Venn diagram:
caseVennG1only = DMS_PaternalEffect_4BPmin[!DMS_PaternalEffect_4BPmin %in% DMS_OffspringEffect_4BPmin]
caseVennG2only = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin & 
                                              !DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
caseVennG1G2NOinter = DMS_OffspringEffect_4BPmin[DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin & 
                                                   !DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
caseVennG1G2inter = DMS_OffspringEffect_4BPmin[DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin & 
                                                 DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope] 
caseVennG2interNOG1 = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin &
                                                   DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]

The size of the different sections of the Venn diagram are as follows:

  • length(caseVennG1only): 1639
  • length(caseVennG2only): 217
  • length(caseVennG1G2NOinter): 134
  • length(caseVennG1G2inter): 190
  • length(caseVennG2interNOG1): 92

Plot the reaction norms with real data as a validation of principle:

plots = makePlotsobservedReactionNorms()
pdf("../../dataOut/caseVennG1only.pdf", width = 15, height = 10)
plots$plot1
dev.off()
## png 
##   2
pdf("../../dataOut/caseVennG2only.pdf", width = 15, height = 10)
plots$plot2
dev.off()
## png 
##   2
pdf("../../dataOut/caseVennG1G2NOinter.pdf", width = 15, height = 10)
plots$plot3
dev.off()
## png 
##   2
pdf("../../dataOut/caseVennG1G2inter.pdf", width = 15, height = 10)
plots$plot4
dev.off()
## png 
##   2
pdf("../../dataOut/caseVennG2interNOG1.pdf", width = 15, height = 10)
plots$plot5
dev.off()
## png 
##   2

3.2.2 Annotate the different DMS groups

## Venn diagram by genes found in each comparisons
# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
DMS_PaternalEffect_4BPmin_GENES = getAnnotationFun(DMSdf = DMS_PaternalEffect_4BPmin,
                         annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)

# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
DMS_OffspringEffect_4BPmin_GENES = getAnnotationFun(DMSdf = DMS_OffspringEffect_4BPmin,
                         annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)

# INTERACTION effects: DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed)
setDMSInversionSlope_GENES  = getAnnotationFun(DMSdf = setDMSInversionSlope,
                         annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)

# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin_GENES$Name, "Offspring effect" = DMS_OffspringEffect_4BPmin_GENES$Name,
                   "InversionSlope" = setDMSInversionSlope_GENES$Name),
              label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none") + ggtitle("Genes in each effect")

# Save:
pdf(file = "../../dataOut/DMS3groupsVenn_GENES.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin_GENES$Name, "Offspring effect" = DMS_OffspringEffect_4BPmin_GENES$Name,
                   "InversionSlope" = setDMSInversionSlope_GENES$Name),
              label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none") + ggtitle("Genes in each effect")
dev.off()
## png 
##   2
# The five cases of Venn diagram:
annot_caseVennG1only = DMS_PaternalEffect_4BPmin_GENES[!DMS_PaternalEffect_4BPmin_GENES$Name %in% DMS_OffspringEffect_4BPmin_GENES$Name,]
annot_caseVennG2only = DMS_OffspringEffect_4BPmin_GENES[!DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name & 
                                              !DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
annot_caseVennG1G2NOinter = DMS_OffspringEffect_4BPmin_GENES[DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name & 
                                                   !DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
annot_caseVennG1G2inter = DMS_OffspringEffect_4BPmin_GENES[DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name & 
                                                 DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,] 
annot_caseVennG2interNOG1 = DMS_OffspringEffect_4BPmin_GENES[!DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name &
                                                   DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]

3.2.3 Save annotated Manhattan plots

namesCases = c("G1", "G2", "G1+G2", "G1:G2", "G2noG1")
casesVec=c("annot_caseVennG1only", "annot_caseVennG2only", "annot_caseVennG1G2NOinter", "annot_caseVennG1G2inter", "annot_caseVennG2interNOG1")

plotAndsave <- function(i){
  P=plotManhattanGenesDMS4BP(annotFile = get(casesVec[i]),
                           GYgynogff = GYgynogff, isBPinfo = FALSE, 
                           myxlab = paste0("Genes linked with the ", namesCases[[i]], " effect"))
  print(P)
  pdf(paste0("../../dataOut/Manhattan", namesCases[[i]], ".pdf"), width = 16, height = 5)
  P
}

annot_caseVennG1G2inter[annot_caseVennG1G2inter$chrom %in% "Gy_chrI",]
##     GeneSymbol            Name   seqid source type    start      end strand
## 16      OR11A1 gasAcul06751-RA Gy_chrI  maker mRNA   163569   164495      +
## 133      VEGFC gasAcul07707-RA Gy_chrI  maker mRNA 25390253 25395728      +
## 174      BARX2 gasAcul06935-RA Gy_chrI  maker mRNA  3714027  3722880      -
## 185     SLC8A2 gasAcul07399-RA Gy_chrI  maker mRNA 15814742 15835770      +
## 215     PARD3B gasAcul07626-RA Gy_chrI  maker mRNA 22467146 22482347      -
## 222      FGF14 gasAcul07676-RA Gy_chrI  maker mRNA 24734549 24751466      +
## 252    GRAMD1B gasAcul06941-RA Gy_chrI  maker mRNA  3901325  3948148      +
##                  ID                                           Alias
## 16  gasAcul06751-RA              maker-Gy_chrI-snap-gene-0.8-mRNA-1
## 133 gasAcul07707-RA snap_masked-Gy_chrI-processed-gene-84.27-mRNA-1
## 174 gasAcul06935-RA             maker-Gy_chrI-snap-gene-12.5-mRNA-1
## 185 gasAcul07399-RA            maker-Gy_chrI-snap-gene-52.26-mRNA-1
## 215 gasAcul07626-RA         maker-Gy_chrI-augustus-gene-75.6-mRNA-1
## 222 gasAcul07676-RA            maker-Gy_chrI-snap-gene-82.58-mRNA-1
## 252 gasAcul06941-RA  snap_masked-Gy_chrI-processed-gene-13.2-mRNA-1
##                                                                              Note
## 16              Similar to OR11A1: Olfactory receptor 11A1 (Homo sapiens OX=9606)
## 133 Similar to VEGFC: Vascular endothelial growth factor C (Homo sapiens OX=9606)
## 174        Similar to Barx2: Homeobox protein BarH-like 2 (Mus musculus OX=10090)
## 185         Similar to Slc8a2: Sodium/calcium exchanger 2 (Mus musculus OX=10090)
## 215 Similar to Pard3b: Partitioning defective 3 homolog B (Mus musculus OX=10090)
## 222    Similar to Fgf14: Fibroblast growth factor 14 (Rattus norvegicus OX=10116)
## 252                   Similar to Gramd1b: Protein Aster-B (Mus musculus OX=10090)
##                                                                                                   Dbxref
## 16                                                                      InterPro:IPR000725, Pfam:PF13853
## 133                                                                                                     
## 174                                                                     InterPro:IPR001356, Pfam:PF00046
## 185 InterPro:IPR003644, InterPro:IPR004837, InterPro:IPR032452, Pfam:PF01699, Pfam:PF03160, Pfam:PF16494
## 215                                                                     InterPro:IPR001478, Pfam:PF00595
## 222                                                                     InterPro:IPR002209, Pfam:PF00167
## 252                                                                                                     
##                          Ontology_term       Parent X_AED
## 16  GO:0004984, GO:0007186, GO:0016021 gasAcul06751  0.11
## 133                                    gasAcul07707  0.19
## 174                         GO:0003677 gasAcul06935  0.19
## 185 GO:0007154, GO:0016021, GO:0055085 gasAcul07399  0.11
## 215                         GO:0005515 gasAcul07626  0.22
## 222                         GO:0008083 gasAcul07676  0.20
## 252                                    gasAcul06941  0.95
##                                 X_QI X_eAED nCpG geneLengthkb nCpGperGenekb
## 16               0|0|0|1|1|1|2|0|248   0.11    1        0.926          1.08
## 133           0|0|0|0.75|1|1|8|0|316   0.11    1        5.475          0.18
## 174 0|0.33|0.5|1|0.66|0.75|4|886|254   0.23    1        8.853          0.11
## 185       0|0|0|1|0.58|0.61|13|0|828   0.12    2       21.028          0.10
## 215     0|0|0|0.87|0.42|0.62|8|0|521   0.22    1       15.201          0.07
## 222        0|0|0|1|0.33|0.75|4|0|174   0.20    1       16.917          0.06
## 252          0|0|0|0.06|1|1|16|0|690   0.95    2       46.823          0.04
##       chrom ENTREZID                                       description
## 16  Gy_chrI    26531 olfactory receptor family 11 subfamily A member 1
## 133 Gy_chrI     7424              vascular endothelial growth factor C
## 174 Gy_chrI     8538                                   BARX homeobox 2
## 185 Gy_chrI     6543                 solute carrier family 8 member A2
## 215 Gy_chrI   117583         par-3 family cell polarity regulator beta
## 222 Gy_chrI     2259                       fibroblast growth factor 14
## 252 Gy_chrI    57476                         GRAM domain containing 1B
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            summary
## 16  Olfactory receptors interact with odorant molecules in the nose, to initiate a neuronal response that triggers the perception of a smell. The olfactory receptor proteins are members of a large family of G-protein-coupled receptors (GPCR) arising from single coding-exon genes. Olfactory receptors share a 7-transmembrane domain structure with many neurotransmitter and hormone receptors and are responsible for the recognition and G protein-mediated transduction of odorant signals. The olfactory receptor gene family is the largest in the genome. The nomenclature assigned to the olfactory receptor genes and proteins for this organism is independent of other organisms. [provided by RefSeq, Jul 2008]
## 133                                                                                                                                                                                                                                                                                                 The protein encoded by this gene is a member of the platelet-derived growth factor/vascular endothelial growth factor (PDGF/VEGF) family. The encoded protein promotes angiogenesis and endothelial cell growth, and can also affect the permeability of blood vessels. The proprotein is further cleaved into a fully processed form that can bind and activate VEGFR-2 and VEGFR-3 receptors. [provided by RefSeq, Apr 2014]
## 174                                                                                                                                                                                                                                                                                                                                                             This gene encodes a member of the homeobox transcription factor family. A highly related protein in mouse has been shown to influence cellular processes that control cell adhesion and remodeling of the actin cytoskeleton in myoblast fusion and chondrogenesis. The encoded protein may also play a role in cancer progression. [provided by RefSeq, Jul 2008]
## 185                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
## 215                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
## 222                                                                                                                                                                                                       The protein encoded by this gene is a member of the fibroblast growth factor (FGF) family. FGF family members possess broad mitogenic and cell survival activities, and are involved in a variety of biological processes, including embryonic development, cell growth, morphogenesis, tissue repair, tumor growth and invasion. A mutation in this gene is associated with autosomal dominant cerebral ataxia. Alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Jul 2008]
## 252
plotAndsave(1);  dev.off()

## png 
##   2
plotAndsave(2);  dev.off()

## png 
##   2
plotAndsave(3);  dev.off()

## png 
##   2
plotAndsave(4);  dev.off()

## png 
##   2
plotAndsave(5);  dev.off()

## png 
##   2
rm(casesVec,namesCases, plotAndsave)

3.2.4 Gene Ontology analysis:

  • Gene universe: all genes which are present in the dataset
  • Gene sub-universe: all genes in DMS in the GOterm universe we only want genes with (1) DMS between G2 exposed to parasites, by brother pair, present in AT LEAST 4 BP/8 for which (2) CpGs were covered, and (3) which have a GOterm attributed
# create gene universe
gene_universe <- data.frame(
  subsetByOverlaps(GRanges(annotGff3), GRanges(uniteCov14_G2_woSexAndUnknowChrOVERLAP))) %>% # subselect covered CpGs
  filter(lengths(Ontology_term)!=0) %>% # rm non existing GO terms
  filter(type %in% "gene")  %>% # keep all the 7416 genes with GO terms
  dplyr::select(c("Name", "Ontology_term")) %>%
  mutate(go_linkage_type = "IEA") %>% #NB: IEA but not necessarily true, it's from Interproscan after Maker. Sticklebacks (biomart) have 82701 IEA and 63 ISS.
  relocate("Ontology_term","go_linkage_type","Name") %>%
  unnest(Ontology_term) %>% # one GO per line (was a list before in this column)
  data.frame()

# Create gene set collection
goFrame <- GOFrame(gene_universe, organism="Gasterosteus aculeatus")
goAllFrame <- GOAllFrame(goFrame)
gsc_universe <- GeneSetCollection(goAllFrame, setType = GOCollection())

IMPORTANT NOTE from Mel: why conditional hypergeometric test? The GO ontology is set up as a directed acyclic graph, where a parent term is comprised of all its child terms. If you do a standard hypergeometric, you might e.g., find ‘positive regulation of kinase activity’ to be significant. If you then test ‘positive regulation of catalytic activity’, which is a parent term, then it might be significant as well, but only because of the terms coming from positive regulation of kinase activity.

The conditional hypergeometric takes this into account, and only uses those terms that were not already significant when testing a higher order (parent) term.

GO_G1only = makedfGO(annot_caseVennG1only, gene_universe, effect = "G1only")
GO_G2only = makedfGO(annot_caseVennG2only, gene_universe, effect = "G2only")
GO_G1G2NOinter = makedfGO(annot_caseVennG1G2NOinter, gene_universe, effect = "G1G2NOinter")
GO_G1G2inter = makedfGO(annot_caseVennG1G2inter, gene_universe, effect = "G1G2inter")
GO_G2interNOG1 = makedfGO(annot_caseVennG2interNOG1, gene_universe, effect = "G2interNOG1")

dfGO = rbind(GO_G1only, GO_G2only, GO_G1G2NOinter, GO_G1G2inter, GO_G2interNOG1)

3.2.5 GO plot

dfGO %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
  geom_point(aes(color = p.value.adjusted, size = genePercent)) +
  scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
  scale_size_continuous(name = "% of genes")+
  theme_bw() + ylab("") + xlab("Treatments comparison") +
  theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
  facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")

3.3 Correlation between methylation and phenotype (nbr worms, BCI)

Approach:

  1. Extract methylation values
  • raw beta values at DMS shared by >4 (or more) BP or
  • derive difference between individual and mean of the group
  1. PCA

  2. extract axes 1 & 2

  3. Correlation with parasite load/BCI

source("R05_PCAatDMS.R")
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA2:PAT + No.Worms:PAT
## <environment: 0x556480fd7570>
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA2:PAT + No.Worms:PAT
## <environment: 0x5564483f8240>
## [1] "The chosen model is:"
## BCI ~ PCA1 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA1:PAT + No.Worms:PAT
## <environment: 0x55645e6eeca8>

4 Differential methylations “Top down”: DM in parents -> how do they look in offspring?

4.0.1 TREATMENT EFFECT: Differential methylation between control and infected, within each parental treatment

## Features Annotation (use package genomation v1.24.0)
## NB Promoters are defined by options at genomation::readTranscriptFeatures function.
## The default option is to take -1000,+1000bp around the TSS and you can change that.
## -> following Heckwolf 2020 and Sagonas 2020, we consider 1500bp upstream and 500 bp downstream

## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
par(mfrow=c(1,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
diffAnn_PAR
##   promoter       exon     intron intergenic 
##       8.58      15.84      34.13      45.72 
##   promoter       exon     intron intergenic 
##       8.58      13.19      32.51      45.72 
## promoter     exon   intron 
##     1.07     0.19     0.44 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3    3020    8128   15141   19226  300247
genomation::plotTargetAnnotation(diffAnn_PAR,precedence=TRUE, main="DMS G1", cex.legend = 1, border="white")

## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
diffAnn_G2_controlG1
##   promoter       exon     intron intergenic 
##      11.28      21.05      30.16      44.44 
##   promoter       exon     intron intergenic 
##      11.28      16.21      28.07      44.44 
## promoter     exon   intron 
##     0.38     0.07     0.14 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      11    2602    6295   14212   15906  261017
genomation::plotTargetAnnotation(diffAnn_G2_controlG1,precedence=TRUE, main="DMS G2-G1c", cex.legend = 1, border="white")
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
diffAnn_G2_infectedG1
##   promoter       exon     intron intergenic 
##      12.90      13.62      34.64      46.81 
##   promoter       exon     intron intergenic 
##      12.90       9.42      30.87      46.81 
## promoter     exon   intron 
##     0.28     0.03     0.08 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       9    2355    7150   12285   14773  109527
genomation::plotTargetAnnotation(diffAnn_G2_infectedG1,precedence=TRUE, main="DMS G2-G1i", cex.legend = 1, border="white")

par(mfrow=c(1,1))
## Run the function to get DMS info
DMS_info_G1 <- myDMSinfo(DMSlist$DMS_15pc_G1_C_T, uniteCov6_G1_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1c_final <- myDMSinfo(DMSlist$DMS_15pc_CC_CT, uniteCov14_G2_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1i_final <- myDMSinfo(DMSlist$DMS_15pc_TC_TT,uniteCov14_G2_woSexAndUnknowChrOVERLAP)

NB Kostas’ results: “We found a total of 1,973 CpG sites out of 1,172,887 CpGs (0.17%) across the genome that showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish”

Here: we obtain out a total of 1001880 CpG sites: 3648 (0.36%) showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish in the parental group; 1197 (0.12%) in the offspring from control parents comparison; 690 (0.07%) in the offspring from infected parents comparison.

4.0.1.1 Intersections of DMS: Venn diagrams

## Chi2 test: are the number of DMS from G2-G1C and G2-G1I overlapping with DMSpar statistically different?

A=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1c_final$DMS))
B=length(DMS_info_G2_G1c_final$DMS)
C=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1i_final$DMS))
D=length(DMS_info_G2_G1i_final$DMS)

Observed=matrix(c(A, B-A, C, D-C),nrow=2)
Observed
##      [,1] [,2]
## [1,]  106   59
## [2,] 1091  631
chisq.test(Observed)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Observed
## X-squared = 0.019909, df = 1, p-value = 0.8878
## not statistically different
## output Venn diagrams
allVenn <- ggVennDiagram(list("DMS G1" = DMS_info_G1$DMS, "DMS G2-c" = DMS_info_G2_G1c_final$DMS, "DMS G2-i" = DMS_info_G2_G1i_final$DMS), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")
hypoVenn <- ggVennDiagram(list("DMS G1\nhypo" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hypo"],
                               "DMS G2-c\nhypo" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hypo"],
                               "DMS G2-i\nhypo" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hypo"]), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")
hyperVenn <- ggVennDiagram(list("DMS G1\nhyper" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hyper"],
                                "DMS G2-c\nhyper" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hyper"],
                                "DMS G2-i\nhyper" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hyper"]), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")

ggarrange(allVenn,
          ggarrange(hypoVenn, hyperVenn, ncol = 2, legend = "none"),
          nrow = 2, widths = c(.5,1))

4.0.1.1.1 Venn with annotated features
runHyperHypoAnnot <- function(){
  par(mfrow=c(2,3))
  par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
  ####### HYPO
  ## Parents comparison:
  A = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(A,precedence=TRUE, main="DMS G1\nhypo",
                                   cex.legend = .4, border="white")
  ## Offspring from control parents comparison:
  B = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(B,precedence=TRUE, main="DMS G2-G1c\nhypo",
                                   cex.legend = .4, border="white")
  ## Offspring from infected parents comparison:
  C = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(C,precedence=TRUE, main="DMS G2-G1i\nhypo",
                                   cex.legend = .4, border="white")
  ####### HYPER
  ## Parents comparison:
  D = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(D,precedence=TRUE, main="DMS G1\nhyper",
                                   cex.legend = .4, border="white")
  ## Offspring from control parents comparison:
  E = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(E,precedence=TRUE, main="DMS G2-G1c\nhyper",
                                   cex.legend = .4, border="white")
  ## Offspring from infected parents comparison:
  f = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(f,precedence=TRUE, main="DMS G2-G1i\nhyper",
                                   cex.legend = .4, border="white")
  par(mfrow=c(1,1))
  return(list(G1hypo=A, G2G1chypo=B, G2G1ihypo=C, G1hyper=D, G2G1chyper=E, G2G1ihyper=f))
}

myannot=runHyperHypoAnnot()

############################################################
## Venn diagram of overlapping features by their annotation:
table(rowSums(as.data.frame(myannot$G1hypo@members))) # NB: some positions are labelled with several features!
## 
##   0   1   2   3 
## 559 566  49   2
## as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"

myAnnotateDMS <- function(DMS, annot){
  ## sanity check
  if (nrow(DMS) != nrow(annot)){"STOP error in arguments"}
  DMS$pos <- paste(DMS$chr, DMS$start, DMS$end)
  ## NB as in MBE 2021: "giving precedence to the following order promoters, exons,
  ## introns, and intergenic regions when features overlapped"
  DMS$feature <- NA
  ## 1. promoters
  DMS$feature[which(annot$prom == 1)] = "promoter"
  ## 2. exons
  DMS$feature[which(annot$exon == 1 & annot$prom ==0)] = "exon"
  ## 3. intron
  DMS$feature[which(annot$intro == 1 & annot$exon == 0 & annot$prom ==0)] = "intron"
  ## 4. intergenic regions
  DMS$feature[which(annot$intro == 0 & annot$exon == 0 & annot$prom ==0)] = "intergenic"
  return(DMS)
}

DMSlist$DMS_15pc_G1_C_T = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T, as.data.frame(diffAnn_PAR@members))
DMSlist$DMS_15pc_G1_C_T_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],
                                             as.data.frame(myannot$G1hypo@members))
DMSlist$DMS_15pc_G1_C_T_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],
                                              as.data.frame(myannot$G1hyper@members))

DMSlist$DMS_15pc_CC_CT = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT, as.data.frame(diffAnn_G2_controlG1@members))
DMSlist$DMS_15pc_CC_CT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],
                                            as.data.frame(myannot$G2G1chypo@members))
DMSlist$DMS_15pc_CC_CT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],
                                             as.data.frame(myannot$G2G1chyper@members))

DMSlist$DMS_15pc_TC_TT = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT, as.data.frame(diffAnn_G2_infectedG1@members))
DMSlist$DMS_15pc_TC_TT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],
                                            as.data.frame(myannot$G2G1ihypo@members))
DMSlist$DMS_15pc_TC_TT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],
                                             as.data.frame(myannot$G2G1ihyper@members))

## Make Venn diagram for each feature
getFeatureDFHYPO <- function(myfeat){
  a = DMSlist$DMS_15pc_G1_C_T_HYPO$pos[DMSlist$DMS_15pc_G1_C_T_HYPO$feature %in% myfeat]
  b = DMSlist$DMS_15pc_CC_CT_HYPO$pos[DMSlist$DMS_15pc_CC_CT_HYPO$feature %in% myfeat]
  c = DMSlist$DMS_15pc_TC_TT_HYPO$pos[DMSlist$DMS_15pc_TC_TT_HYPO$feature %in% myfeat]
  return(list(a=a,b=b,c=c))
}

getFeatureDFHYPER <- function(myfeat){
  a = DMSlist$DMS_15pc_G1_C_T_HYPER$pos[DMSlist$DMS_15pc_G1_C_T_HYPER$feature %in% myfeat]
  b = DMSlist$DMS_15pc_CC_CT_HYPER$pos[DMSlist$DMS_15pc_CC_CT_HYPER$feature %in% myfeat]
  c = DMSlist$DMS_15pc_TC_TT_HYPER$pos[DMSlist$DMS_15pc_TC_TT_HYPER$feature %in% myfeat]
  return(list(a=a,b=b,c=c))
}

getVenn <- function(feat, direction){
  if (direction == "hypo"){
    ggVennDiagram(list(A = getFeatureDFHYPO(feat)[["a"]],
                       B = getFeatureDFHYPO(feat)[["b"]],
                       C = getFeatureDFHYPO(feat)[["c"]]), label_alpha = 0,
                  category.names = c(paste0("DMS G1\nhypo\n", feat), paste0("DMS G2-c\nhypo\n", feat), paste0("DMS G2-i\nhypo\n", feat))) +
      scale_fill_gradient(low="white",high = "red")
  } else if (direction == "hyper"){
    ggVennDiagram(list(A = getFeatureDFHYPER(feat)[["a"]],
                       B = getFeatureDFHYPER(feat)[["b"]],
                       C = getFeatureDFHYPER(feat)[["c"]]), label_alpha = 0,
                  category.names = c(paste0("DMS G1\nhyper\n", feat), paste0("DMS G2-c\nhyper\n", feat), paste0("DMS G2-i\nhyper\n", feat))) +
      scale_fill_gradient(low="white",high = "red")
  }
}
ggarrange(getVenn("promoter", "hypo"), getVenn("exon", "hypo"),
          getVenn("intron", "hypo"), getVenn("intergenic", "hypo"),
          nrow = 2, ncol = 2)

ggarrange(getVenn("promoter", "hyper"), getVenn("exon", "hyper"),
          getVenn("intron", "hyper"), getVenn("intergenic", "hyper"),
          nrow = 2, ncol = 2)

4.0.1.2 Manhattan plot of DMS

## Parents trt-ctrl
# load annotation
annot_PAR <- as.data.frame(diffAnn_PAR@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T, annotFile = annot_PAR, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")

## G2-G1c trt-ctrl
# load annotation
annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT, annotFile = annot_G2_G1c, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")

## G2-G1i trt-ctrl
# load annotation
annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT, annotFile = annot_G2_G1i, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")

## Outliers in Manhattan plot: 15% diff + 2SD
outliers_G1_final <- which(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff)))
outliers_annot_G1 <- as.data.frame(diffAnn_PAR@members)[outliers_G1_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T[outliers_G1_final, ],
                   annotFile = outliers_annot_G1, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")

outliers_G2_G1c_final <- which(abs(DMSlist$DMS_15pc_CC_CT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_CC_CT$meth.diff)))
outliers_annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)[outliers_G2_G1c_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT[outliers_G2_G1c_final, ],
                   annotFile = outliers_annot_G2_G1c, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")

outliers_G2_G1i_final <- which(abs(DMSlist$DMS_15pc_TC_TT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_TC_TT$meth.diff)))
outliers_annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)[outliers_G2_G1i_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT[outliers_G2_G1i_final, ],
                   annotFile = outliers_annot_G2_G1i, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")

4.1 I. Focus on CpG positions at parental (Ctrl-Inf) DMS

Question: how are the beta values in the different groups at the parental DMS?

##############
## Prepare dataset
PM_G1 <- getPMdataset(uniteCov = uniteCov6_G1_woSexAndUnknowChrOVERLAP, MD = fullMetadata_PAR, gener="parents")
PM_G2 <- getPMdataset(uniteCov = uniteCov14_G2_woSexAndUnknowChrOVERLAP, MD = fullMetadata_OFFS, gener="offspring")

4.1.1 What is the relative contribution of methylation to brother pair & paternal treatment?

Test of VCA: variance component analysis https://cran.r-project.org/web/packages/VCA/vignettes/VCA_package_vignette.html

4.1.1.1 Hypo methylation

PM_G2_mean_hypo <- PM_G2[PM_G2$hypohyper %in% "hypo", ] %>%
  group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
  dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()

varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo,
        MeanLine=list(var=c("G1_trt", "G2_trt"),
                      col=c("white", "blue"), lwd=c(2,2)),
        BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
        YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypomethylated upon infection"))

myfitVCA_hypo <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo)

### Real values
trtEffect <- sum(myfitVCA_hypo$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hypo$aov.tab[5:8, 5])
error <- sum(myfitVCA_hypo$aov.tab[9, 5])
realValHypoVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)

### Randomisation
myrandomVCA <- function(df=PM_G2_mean_hypo){
  randomDF = df
  randomDF$G1_trt = sample(PM_G2_mean_hypo$G1_trt, replace = F)
  randomDF$G2_trt = sample(PM_G2_mean_hypo$G2_trt, replace = F)
  randomDF$brotherPairID = sample(PM_G2_mean_hypo$brotherPairID, replace = F)
  myfitVCA <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = randomDF)
  trtEffect <- sum(myfitVCA$aov.tab[2:4, 5])
  genEffect <- sum(myfitVCA$aov.tab[5:8, 5])
  error <- sum(myfitVCA$aov.tab[9, 5])
  return(data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error))
}

# randomHypoVCA = do.call(rbind, lapply(1:1000, function(x) {
#   df=myrandomVCA(PM_G2_mean_hypo)
#   df$rep=x
#   return(df)}))

# randomHypoVCA = melt(randomHypoVCA, id.vars = "rep")
# saveRDS(randomHypoVCA, file = "Rdata/randomHypoVCA.RDS")
randomHypoVCA <- readRDS(file = "Rdata/randomHypoVCA.RDS")
df2=reshape2::melt(realValHypoVCA)

sumDF <- randomHypoVCA %>%
  group_by(variable) %>%
  dplyr::summarize(value = mean(value)) %>% data.frame()

ggplot(randomHypoVCA, aes(x=variable, y=value))+
  geom_boxplot()+
  geom_jitter(width=.1, alpha=.2)+
  geom_point(data = df2, col = "red", size = 6)+
  geom_text(data=sumDF, aes(label=round(value)), col="white")+
  geom_text(data = df2, aes(label=round(value)), col="white")+
  theme_cleveland()+
  ggtitle("VCA with bootstrap N=1000 at hypo-parDMS", subtitle = "red: observed values")

# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
## 
## 
## 
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
## 
## > VCA Result:
## -------------
## 
##   Name                        DF     SS       MS       VC      %Total  SD    
## 1 total                       6.6733                   15.8572 100     3.9821
## 2 G1_trt                      1      319.6035 319.6035 4.7515  29.9646 2.1798
## 3 G2_trt                      1      3.8987   3.8987   0.0712  0.4491  0.2669
## 4 G1_trt:G2_trt               1      1.5539   1.5539   0*      0*      0*    
## 5 brotherPairID               7      129.185  18.455   0*      0*      0*    
## 6 G1_trt:brotherPairID        7      395.9735 56.5676  7.8576  49.5525 2.8031
## 7 G2_trt:brotherPairID        7      9.9214   1.4173   0*      0*      0*    
## 8 G1_trt:G2_trt:brotherPairID 7      20.2165  2.8881   0*      0*      0*    
## 9 error                       79     250.9663 3.1768   3.1768  20.0337 1.7824
##   CV[%]  Var(VC)
## 1 6.6047        
## 2 3.6154 66.6443
## 3 0.4426 0.0124 
## 4 0*     0.0096 
## 5 0*     5.4751 
## 6 4.6493 19.7869
## 7 0*     0.068  
## 8 0*     0.2383 
## 9 2.9562 0.2555 
## 
## Mean: 60.2922 (N = 111) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA | * VC set to 0 | adapted MS used for total DF
## 
## 
## > VC:
## -----
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       15.8572  6.824  68.824  7.787         53.1883      
## G1_trt                      4.7515   0*     20.7519 0*            18.1795      
## G2_trt                      0.0712   0*     0.2897  0*            0.2545       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        7.8576   0*     16.576  0.5409        15.1744      
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       3.1768   2.3794 4.457   2.491         4.2163       
## 
## > SD:
## -----
##                             Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total                       3.9821   2.6123 8.296  2.7905        7.293        
## G1_trt                      2.1798   0*     4.5554 0*            4.2637       
## G2_trt                      0.2669   0*     0.5382 0*            0.5045       
## G1_trt:G2_trt               0                                                 
## brotherPairID               0                                                 
## G1_trt:brotherPairID        2.8031   0*     4.0714 0.7355        3.8954       
## G2_trt:brotherPairID        0                                                 
## G1_trt:G2_trt:brotherPairID 0                                                 
## error                       1.7824   1.5425 2.1112 1.5783        2.0534       
## 
## > CV[%]:
## --------
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       6.6047   4.3327 13.7597 4.6283        12.0961      
## G1_trt                      3.6154   0*     7.5556  0*            7.0718       
## G2_trt                      0.4426   0*     0.8926  0*            0.8368       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        4.6493   0*     6.7527  1.2199        6.4609       
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       2.9562   2.5584 3.5015  2.6177        3.4057       
## 
## 
## 95% Confidence Level  |  CIs for negative VCs excluded  | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs

4.1.1.2 Hyper methylation

PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
  group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
  dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()

varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
        MeanLine=list(var=c("G1_trt", "G2_trt"),
                      col=c("white", "blue"), lwd=c(2,2)),
        BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
        YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))

myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)

### Real values
trtEffect <- sum(myfitVCA_hyper$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hyper$aov.tab[5:8, 5])
error <- sum(myfitVCA_hyper$aov.tab[9, 5])
realValHyperVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)

### Randomisation
# randomHyperVCA = do.call(rbind, lapply(1:1000, function(x) {
#   df=myrandomVCA(PM_G2_mean_hyper)
#   df$rep=x
#   return(df)}))
#
# randomHyperVCA = melt(randomHyperVCA, id.vars = "rep")
# saveRDS(randomHyperVCA, file = "Rdata/randomHyperVCA.RDS")
randomHyperVCA <- readRDS(file = "Rdata/randomHyperVCA.RDS")
df2=reshape2::melt(realValHyperVCA)

sumDF <- randomHyperVCA %>%
  group_by(variable) %>%
  dplyr::summarize(value = mean(value)) %>% data.frame()

ggplot(randomHyperVCA, aes(x=variable, y=value))+
  geom_boxplot()+
  geom_jitter(width=.1, alpha=.2)+
  geom_point(data = df2, col = "red", size = 6)+
  geom_text(data=sumDF, aes(label=round(value)), col="white")+
  geom_text(data = df2, aes(label=round(value)), col="white")+
  theme_cleveland()+
  ggtitle("VCA with bootstrap N=1000 at hyper-parDMS", subtitle = "red: observed values")

# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
## 
## 
## 
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
## 
## > VCA Result:
## -------------
## 
##   Name                        DF     SS       MS       VC      %Total  SD    
## 1 total                       6.6733                   15.8572 100     3.9821
## 2 G1_trt                      1      319.6035 319.6035 4.7515  29.9646 2.1798
## 3 G2_trt                      1      3.8987   3.8987   0.0712  0.4491  0.2669
## 4 G1_trt:G2_trt               1      1.5539   1.5539   0*      0*      0*    
## 5 brotherPairID               7      129.185  18.455   0*      0*      0*    
## 6 G1_trt:brotherPairID        7      395.9735 56.5676  7.8576  49.5525 2.8031
## 7 G2_trt:brotherPairID        7      9.9214   1.4173   0*      0*      0*    
## 8 G1_trt:G2_trt:brotherPairID 7      20.2165  2.8881   0*      0*      0*    
## 9 error                       79     250.9663 3.1768   3.1768  20.0337 1.7824
##   CV[%]  Var(VC)
## 1 6.6047        
## 2 3.6154 66.6443
## 3 0.4426 0.0124 
## 4 0*     0.0096 
## 5 0*     5.4751 
## 6 4.6493 19.7869
## 7 0*     0.068  
## 8 0*     0.2383 
## 9 2.9562 0.2555 
## 
## Mean: 60.2922 (N = 111) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA | * VC set to 0 | adapted MS used for total DF
## 
## 
## > VC:
## -----
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       15.8572  6.824  68.824  7.787         53.1883      
## G1_trt                      4.7515   0*     20.7519 0*            18.1795      
## G2_trt                      0.0712   0*     0.2897  0*            0.2545       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        7.8576   0*     16.576  0.5409        15.1744      
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       3.1768   2.3794 4.457   2.491         4.2163       
## 
## > SD:
## -----
##                             Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total                       3.9821   2.6123 8.296  2.7905        7.293        
## G1_trt                      2.1798   0*     4.5554 0*            4.2637       
## G2_trt                      0.2669   0*     0.5382 0*            0.5045       
## G1_trt:G2_trt               0                                                 
## brotherPairID               0                                                 
## G1_trt:brotherPairID        2.8031   0*     4.0714 0.7355        3.8954       
## G2_trt:brotherPairID        0                                                 
## G1_trt:G2_trt:brotherPairID 0                                                 
## error                       1.7824   1.5425 2.1112 1.5783        2.0534       
## 
## > CV[%]:
## --------
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       6.6047   4.3327 13.7597 4.6283        12.0961      
## G1_trt                      3.6154   0*     7.5556  0*            7.0718       
## G2_trt                      0.4426   0*     0.8926  0*            0.8368       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        4.6493   0*     6.7527  1.2199        6.4609       
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       2.9562   2.5584 3.5015  2.6177        3.4057       
## 
## 
## 95% Confidence Level  |  CIs for negative VCs excluded  | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs

4.2 DMS values in parents

parmod <- lmer(data = PM_G1, BetaValue ~ meth.diff.parentals : Treatment + (1|CpGSite) + (1|brotherPairID))

## check normality of residuals assumption
qqnorm(resid(parmod))
qqline(resid(parmod))

pred <- ggpredict(parmod, terms = c("meth.diff.parentals", "Treatment"))
plot(pred, add.data = T)+
  scale_color_manual(values = c("black", "red"))+
  scale_y_continuous(name = "Beta values")+
  scale_x_continuous(name = "Methylation difference between infected and control parents in percentage")+
  ggtitle("Predicted methylation ratio (Beta) values in parents\n as a function of differential methylation between exposed and control groups")+
  theme_bw()

4.2.1 Linear model: does the beta value of offspring at DMS depends on treatment Parent x Offspring?

modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2, REML = F) # REML =F for model comparison
mod_noG1trt <- lmer(BetaValue ~ G2_trt:hypohyper + (1|CpGSite)+ (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noG2trt <-lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noInteractions <- lmer(BetaValue ~ (G1_trt + G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noHypoHyper <- lmer(BetaValue ~ (G1_trt * G2_trt) + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)

## check normality of residuals assumption
qqnorm(resid(modFull))
qqline(resid(modFull))

## Likelihood ratio tests for all variables:
lrtest(modFull, mod_noG1trt) # G1 trt is VERY VERY significant (LRT: χ² (4) = 1163.6, p < 0.001)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G2_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1516301 -4 1163.6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noG2trt) # G2 trt is VERY VERY significant (LRT: χ² (4) = 30.02, p < 0.001) NB that changed when brotherpair is used instead of family!
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G1_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1515734 -4 30.022  4.844e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noInteractions) # interactions are significant (LRT: χ² (2) = 9.21, p < 0.01)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt + G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
##   #Df   LogLik Df Chisq Pr(>Chisq)   
## 1  12 -1515719                       
## 2  10 -1515724 -2 9.211   0.009997 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noHypoHyper) # hypo/hyper VERY VERY significant (LRT: χ² (4) = 1140, p < 0.001)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt * G2_trt) + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1516289 -4 1140.4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Post-hoc tests between treatments
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2)
modFull_emmeans <- emmeans(modFull, list(pairwise ~ (G1_trt:G2_trt):hypohyper), adjust = "tukey")
modFull_emmeans
## $`emmeans of G1_trt, G2_trt, hypohyper`
##  G1_trt  G2_trt  hypohyper emmean    SE  df asymp.LCL asymp.UCL
##  Control Control hypo        62.1 0.960 Inf      60.2      64.0
##  Exposed Control hypo        58.8 0.959 Inf      57.0      60.7
##  Control Exposed hypo        62.3 0.960 Inf      60.4      64.2
##  Exposed Exposed hypo        58.9 0.960 Inf      57.1      60.8
##  Control Control hyper       56.6 0.872 Inf      54.9      58.3
##  Exposed Control hyper       58.7 0.872 Inf      57.0      60.4
##  Control Exposed hyper       55.8 0.872 Inf      54.1      57.6
##  Exposed Exposed hyper       58.6 0.872 Inf      56.9      60.3
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $`pairwise differences of G1_trt, G2_trt, hypohyper`
##  contrast                                      estimate    SE  df z.ratio
##  Control Control hypo - Exposed Control hypo     3.2670 0.199 Inf  16.439
##  Control Control hypo - Control Exposed hypo    -0.2024 0.205 Inf  -0.989
##  Control Control hypo - Exposed Exposed hypo     3.1756 0.202 Inf  15.735
##  Control Control hypo - Control Control hyper    5.5292 0.672 Inf   8.226
##  Control Control hypo - Exposed Control hyper    3.3925 0.672 Inf   5.049
##  Control Control hypo - Control Exposed hyper    6.2691 0.673 Inf   9.317
##  Control Control hypo - Exposed Exposed hyper    3.5420 0.672 Inf   5.268
##  Exposed Control hypo - Control Exposed hypo    -3.4694 0.202 Inf -17.160
##  Exposed Control hypo - Exposed Exposed hypo    -0.0913 0.199 Inf  -0.460
##  Exposed Control hypo - Control Control hyper    2.2623 0.671 Inf   3.369
##  Exposed Control hypo - Exposed Control hyper    0.1256 0.671 Inf   0.187
##  Exposed Control hypo - Control Exposed hyper    3.0021 0.672 Inf   4.467
##  Exposed Control hypo - Exposed Exposed hyper    0.2751 0.671 Inf   0.410
##  Control Exposed hypo - Exposed Exposed hypo     3.3780 0.205 Inf  16.479
##  Control Exposed hypo - Control Control hyper    5.7317 0.673 Inf   8.514
##  Control Exposed hypo - Exposed Control hyper    3.5950 0.673 Inf   5.342
##  Control Exposed hypo - Control Exposed hyper    6.4715 0.674 Inf   9.608
##  Control Exposed hypo - Exposed Exposed hyper    3.7444 0.673 Inf   5.561
##  Exposed Exposed hypo - Control Control hyper    2.3536 0.672 Inf   3.501
##  Exposed Exposed hypo - Exposed Control hyper    0.2169 0.672 Inf   0.323
##  Exposed Exposed hypo - Control Exposed hyper    3.0935 0.673 Inf   4.597
##  Exposed Exposed hypo - Exposed Exposed hyper    0.3664 0.672 Inf   0.545
##  Control Control hyper - Exposed Control hyper  -2.1367 0.137 Inf -15.618
##  Control Control hyper - Control Exposed hyper   0.7398 0.141 Inf   5.251
##  Control Control hyper - Exposed Exposed hyper  -1.9872 0.139 Inf -14.332
##  Exposed Control hyper - Control Exposed hyper   2.8765 0.140 Inf  20.557
##  Exposed Control hyper - Exposed Exposed hyper   0.1495 0.137 Inf   1.091
##  Control Exposed hyper - Exposed Exposed hyper  -2.7271 0.141 Inf -19.290
##  p.value
##   <.0001
##   0.9761
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.9998
##   0.0172
##   1.0000
##   0.0002
##   0.9999
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.0110
##   1.0000
##   0.0001
##   0.9994
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.9589
##   <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 8 estimates
P1 <- plot(modFull_emmeans, by = "hypohyper", comparisons = TRUE) +
  # coord_flip()+
  theme_bw() +
  ggtitle("Estimated marginal means of methylation ratio (beta)\n of offspring at parental DMS")+
  theme(legend.position = "none", axis.title.x = element_blank()) +
  scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))

## NB: Comparison arrows: https://cran.r-project.org/web/packages/emmeans/vignettes/xplanations.html
## two estimated marginal means (EMMs) differ significantly if, and only if, their respective comparison arrows do not overlap
## These comparison arrows are decidedly not the same as confidence intervals.
## Confidence intervals for EMMs are based on the statistical properties of the individual EMMs, whereas comparison arrows
## are based on the statistical properties of differences of EMMs.

## Add the PARENTAL DMS value
## Same test on ALL, G1 and G2 fish
modFullG1 <- lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|brotherPairID), data = PM_G1)

modFullG1_emmeans <- emmeans(modFullG1, list(pairwise ~ G1_trt:hypohyper), adjust = "tukey")
modFullG1_emmeans
## $`emmeans of G1_trt, hypohyper`
##  G1_trt  hypohyper emmean    SE  df asymp.LCL asymp.UCL
##  Control hypo        66.9 0.657 Inf      65.7      68.2
##  Exposed hypo        49.1 0.661 Inf      47.8      50.4
##  Control hyper       48.6 0.534 Inf      47.6      49.6
##  Exposed hyper       65.8 0.536 Inf      64.7      66.8
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $`pairwise differences of G1_trt, hypohyper`
##  1                             estimate    SE  df z.ratio p.value
##  Control hypo - Exposed hypo     17.889 0.303 Inf  59.074  <.0001
##  Control hypo - Control hyper    18.343 0.643 Inf  28.539  <.0001
##  Control hypo - Exposed hyper     1.179 0.645 Inf   1.829  0.2597
##  Exposed hypo - Control hyper     0.454 0.647 Inf   0.701  0.8966
##  Exposed hypo - Exposed hyper   -16.711 0.649 Inf -25.761  <.0001
##  Control hyper - Exposed hyper  -17.164 0.209 Inf -82.107  <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates
P2 <- plot(modFullG1_emmeans, by = "hypohyper", comparisons = TRUE) +
  theme_bw() +
  ggtitle("Estimated marginal means of methylation ratio (beta)\n of parents at DMS")+
  theme(legend.position = "none", axis.title.x = element_blank()) +
  scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))

ggarrange(P2, P1, labels = c("A", "B"), ncol = 1, nrow = 2)

4.2.2 Are the nbr of residuals methylation AT PARENTAL DMS different in the 4 G2 trt? (for hypo vs hypermeth)?

length(unique(PM_G1$CpGSite))# 3648 positions
## [1] 3648
PM_G1 %>% dplyr::count(ID)## NB: not all covered in all samples
##      ID    n
## 1   S16 3300
## 2   S17 2842
## 3   S35 2747
## 4   S36 2568
## 5   S52 3037
## 6   S53 3468
## 7   S68 3348
## 8   S69 3500
## 9   S70 2237
## 10  S71 3302
## 11  S72 3479
## 12  S73 2973
## 13  S76 3239
## 14  S77 3232
## 15  S78 3434
## 16  S79 2679
## 17  S94 2564
## 18  S95 1766
## 19 S107 3414
## 20 S108 3230
## 21 S124 3387
## 22 S125 2526
## 23 S143 3097
## 24 S144 2773
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hypo"]))# 1176 positions hypomethylated upon parental inf
## [1] 1176
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hyper"]))# 2472 positions hypermethylated upon parental inf
## [1] 2472
myfun <- function(X){
  ## Calculate nbr of CpG hypo/hypermethylated per individual, and nbr of covered CpG:
  X <- X %>% group_by(ID, Treatment, brotherPairID, clutch.ID, Sex) %>%
    dplyr::summarise(ncov = n(),
                     hypoMeth = sum(BetaValue < 0.3),
                     hyperMeth = sum(BetaValue > 0.7)) %>% data.frame()
  # Calculate residuals of nbr of methCpG by nbr of covered CpG
  X$res_Nbr_methCpG_Nbr_coveredCpG_HYPO = residuals(lm(X$hypoMeth ~ X$ncov))
  X$res_Nbr_methCpG_Nbr_coveredCpG_HYPER = residuals(lm(X$hyperMeth ~ X$ncov))

  ## Statistical model: is it different in each offspring trt group?
  mod1 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  mod0 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  print(lrtest(mod1, mod0))

  ## Post-hoc test:
  modhypo <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
                  data = X)
  ## pairwise posthoc test
  print(emmeans(modhypo, list(pairwise ~ Treatment), adjust = "tukey"))

  mod3 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  mod4 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  print(lrtest(mod3, mod4))

  ## Post-hoc test:
  modhyper <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
                   data = X)
  ## pairwise posthoc test
  print(emmeans(modhyper, list(pairwise ~ Treatment), adjust = "tukey"))

  ## Plot
  phypo <- plot(ggpredict(modhypo, terms = c("Treatment")), add.data = TRUE)+
    scale_y_continuous("Residuals of number of hypomethylated methylated \ncytosines on number of cytosines covered") +
    ggtitle("Predicted residuals nbr of hypomethylated CpG")+
    theme_bw()

  phyper <- plot(ggpredict(modhyper, terms = c("Treatment")), add.data = TRUE)+
    scale_y_continuous("Residuals of number of hypermethylated methylated \n cytosines on number of cytosines covered") +
    ggtitle("Predicted residuals nbr of hypermethylated CpG")+
    theme_bw()
  return(list(phypo, phyper))
}

listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hypo",])
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df Chisq Pr(>Chisq)
## 1   8 -420.75                    
## 2   5 -423.55 -3 5.605     0.1325
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control  -4.01 4.11 16.2   -12.72     4.70
##  NE_exposed  -5.64 4.14 16.3   -14.40     3.11
##  E_control    7.45 4.12 16.1    -1.29    16.18
##  E_exposed    4.61 4.12 16.1    -4.11    13.33
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE    df t.ratio p.value
##  NE_control - NE_exposed     1.63 2.55 93.08   0.640  0.9188
##  NE_control - E_control    -11.46 5.83  8.47  -1.966  0.2722
##  NE_control - E_exposed     -8.62 5.82  8.47  -1.482  0.4875
##  NE_exposed - E_control    -13.09 5.86  8.53  -2.234  0.1893
##  NE_exposed - E_exposed    -10.25 5.85  8.53  -1.754  0.3559
##  E_control - E_exposed       2.84 2.50 92.53   1.135  0.6692
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## 
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   8 -419.75                     
## 2   5 -422.58 -3 5.6616     0.1293
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control   3.97 4.18 16.1    -4.88    12.82
##  NE_exposed   5.61 4.20 16.2    -3.29    14.51
##  E_control   -7.52 4.19 16.0   -16.40     1.35
##  E_exposed   -4.53 4.18 16.1   -13.39     4.33
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE    df t.ratio p.value
##  NE_control - NE_exposed    -1.64 2.51 93.04  -0.651  0.9148
##  NE_control - E_control     11.50 5.92  8.37   1.942  0.2815
##  NE_control - E_exposed      8.50 5.91  8.37   1.438  0.5112
##  NE_exposed - E_control     13.13 5.95  8.43   2.208  0.1971
##  NE_exposed - E_exposed     10.14 5.94  8.43   1.707  0.3771
##  E_control - E_exposed      -3.00 2.47 92.51  -1.216  0.6185
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates
## NOT significant
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
                top = text_grob("Parental DMS hypomethylated upon infection, in offspring"))

listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hyper",])
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)   
## 1   8 -476.63                        
## 2   5 -482.69 -3 12.123   0.006974 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control  13.94 5.13 17.0     3.12    24.75
##  NE_exposed   8.48 5.19 17.2    -2.46    19.42
##  E_control   -9.77 5.15 16.8   -20.66     1.11
##  E_exposed  -12.95 5.14 16.9   -23.79    -2.11
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE   df t.ratio p.value
##  NE_control - NE_exposed     5.46 4.45 93.8   1.225  0.6124
##  NE_control - E_control     23.71 7.28 10.3   3.257  0.0353
##  NE_control - E_exposed     26.88 7.26 10.3   3.701  0.0172
##  NE_exposed - E_control     18.25 7.36 10.5   2.481  0.1209
##  NE_exposed - E_exposed     21.43 7.33 10.5   2.923  0.0598
##  E_control - E_exposed       3.17 4.37 93.0   0.726  0.8864
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## 
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)   
## 1   8 -477.24                        
## 2   5 -483.28 -3 12.081    0.00711 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control -14.11 5.18 17.0   -25.04    -3.18
##  NE_exposed  -8.53 5.24 17.2   -19.58     2.52
##  E_control    9.95 5.21 16.8    -1.05    20.95
##  E_exposed   12.96 5.19 16.9     2.00    23.92
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE   df t.ratio p.value
##  NE_control - NE_exposed    -5.58 4.47 93.8  -1.247  0.5989
##  NE_control - E_control    -24.06 7.36 10.3  -3.269  0.0348
##  NE_control - E_exposed    -27.07 7.34 10.3  -3.687  0.0177
##  NE_exposed - E_control    -18.48 7.43 10.4  -2.486  0.1204
##  NE_exposed - E_exposed    -21.49 7.41 10.4  -2.901  0.0622
##  E_control - E_exposed      -3.01 4.39 93.0  -0.686  0.9021
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates
## Treatment SIGNIFICANT in both excess hypo/hyper methylation **

# $`pairwise differences of Treatment`
## HYPO
# 1                       estimate   SE   df t.ratio p.value
# NE_control - E_control     23.71 7.28 10.3   3.257  0.0353
# NE_control - E_exposed     26.88 7.26 10.3   3.701  0.0172

## HYPER
# 1                       estimate   SE   df t.ratio p.value
# NE_control - E_control    -24.06 7.36 10.3  -3.269  0.0348
# NE_control - E_exposed    -27.07 7.34 10.3  -3.687  0.0177

annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
                top = text_grob("Parental DMS hypermethylated upon infection, in offspring"))

-> The beta values in parentalDMS in offspring follow the parental pattern hypo/hyper methylated upon infection